Mailto: julio.cordero@medma.uni-heidelberg.de
-Instead of an expression of genes, we propose an enrichment score. Which combines the motif score of the TFs and the log10-Pvalue of the motif in the chromatin state. Each Cluster identified represents a group of PEAKS which contain a specific signature of MOTIFS from TFs. - Allow the identification of TFs in the neighborhood Zone. To understand the mechanism of how TFs, cooperate among them. -This Script was used with the Motifs associated with Histone marks. However, it could also be modified to input Motifs associated with ATAC-seq or ChIP-seq. In the case of ATAC-seq, the motifs coming from footprinting analysis or Homer motifs search are highly recommended.
workdir = "./"
setwd(workdir)
PTHA1="../03OUTPUT/"
dir.create(PTHA1)
PORT=PTHA1
TRY="_"
TRY1="02Ia_"
PROJECT="03b_TFinZONE_at_04dpci"
PVALUE=0.05
#### FOR VOLCANO ###
upcol<- "#B2182B" # magenta from PiyG
nc<- "#000000" # black
downcol<- "grey" # green from PiyG
CC= c(downcol, "#F7F7F7",upcol)
BASIC_COL="black"
CO3<- c("lightgrey","#B2182B")
CO_ALU3=c("#d7191c","#d8b365","#542788","grey", "#91bfdb", "grey","grey")
NAME33<- c("Mo_Name","Consensus","P-value","Log_P-value","q-value_Benja","Nor_of_Tar","Per_of_Tar","No_of_Tar_Backg","Per_of_Tar_Backg")
NAME34<- c("HOmer_NAME",NAME33)
mat_m<- data.frame("mask_500_FIND")
## Here you write the Path to the
WORK1="../01DATA_ORI/08MO_onP/"
#WORK1="Y:/002ZF/000FIGURES_ANALY/00GITHUB_01HIS_Chroma_Factors/01HIS_Chrom_Factors_TEST/04MOtifs_Analy_as_SC_MOAC_Fig3_5/02Fig4_MO_MOAC_14dpci/14dATA_ORI/08MOTIFS_on_PEAKS/"
NAME_G2 <-data.frame(list.files(path=WORK1,pattern="*_FIND"))
colnames(NAME_G2)<- "ENH4"
#NAME_G2$GROUP <- gsub("CORR1_14d_", "", NAME_G2$ENH4)
NAME_G2$GROUP <- gsub("*_mask_500_FIND", "", NAME_G2$ENH4)
### this is important to check the SYMBOL
NAME_G22<- data.frame(c("00A_Ia","01Ia_A","02U_Ea","03U_Ia","04U_Pa"))
NAME_G2c<- as.character(t(NAME_G2[,2]))
#WORK11A11="../14dATA_ORI/07aMO_forMOAC/08TSS_Enh_w_14dpci_fHeatmap_ALL.xlsx"
WORK11A11="../01DATA_ORI/07aMO_forSeurat_ANALY/08TSS_Enh_w_04dpci_fHeatmap_ALL.xlsx"
MAT_1A= read_excel(WORK11A11)
#MAT_1A= read.delim(WORK11A11)
colnames(MAT_1A)[1]<- "SYMBOL"
dim(MAT_1A)
## [1] 338 11
##### Set resolution for CLustering
RES=0.5
## sequential:
## - args: function (..., workers = 16, envir = parent.frame())
## - tweaked: TRUE
## - call: plan("sequential", workers = 16)
##
## 00A_Ia_04dcpi 01Ia_A_04dcpi 02U_Ea_04dcpi 03U_Ia_04dcpi 04U_Pa_04dcpi
## 39886 152129 14945 26201 37650
## [1] 3
b7<- AC_RE_VOL_SC_META
rownames(b7)<- b7$PositionID2
mat_c1<- mat_cm_TMM_BOTH_SC[,rownames(b7)]
rownames(mat_c1)<- mat_cm_TMM_BOTH_SC$SYMBOL
dim(mat_c1)
## [1] 230 4640
dim(b7)
## [1] 4640 5
smartseq2 <- CreateSeuratObject(mat_c1,project = "04d", meta.data = b7)
## png
## 2
arrangeQC_TF <- ggarrange(EC00,EC001, ncol = 2,nrow =1,common.legend = T, align = c("hv"),legend="none")
print(arrangeQC_TF)
grid.arrange( plot1,plot2, ncol = 1,nrow = 2)
## Centering and scaling data matrix
## PC_ 1
## Positive: RARa, Twist2, Sox3, Sox10, Olig2, TCF4, Smad3, BHLHA15, Bapx1, SCL
## Sox21, Smad2, ERG, Smad4, Sox6, Atoh1, PR, Erra, Sox4, TEAD3
## Tcf21, Tbet, EAR2, Zac1, SPDEF, TEAD1, ETS1, EHF, Sox2, EWS
## Negative: Hoxa13, Foxo1, Nanog, FOXK1, Foxf1, Hoxd13, FOXA1, Hoxa11, FoxL2, Foxo3
## FOXM1, Foxa2, Hoxd11, FOXK2, FoxD3, FOXP1, HOXB13, MYB, Meis1, Fox
## HLF, NFIL3, Foxa3, GSC, GATA3, bHLHE41, Mef2b, Hoxa9, Npas4, Hoxd10
## PC_ 2
## Positive: Hoxa9, Gata4, Tbx5, Gata6, Hoxd10, NPAS, HIC1, Gata2, Gata1, Hnf6b
## Hoxc9, MNT, Mef2c, Hoxd12, Hoxa10, Gfi1b, Cux2, MITF, Mef2a, MafA
## NFAT, LRF, Max, Mef2b, NPAS2, Fosl2, GATA3, ZNF7, STAT4, Tbx20
## Negative: Foxf1, FOXK1, Foxo3, FoxL2, FOXP1, Foxo1, Fox, Foxa2, FOXA1, Eomes
## FOXK2, BMYB, MYB, Npas4, SCL, Elk4, ELF1, Foxa3, FOXM1, AMYB
## Elk1, Tgif2, ETS, ETV4, Tgif1, Fli1, FoxD3, Foxh1, Tbr1, Ap4
## PC_ 3
## Positive: ETV1, ETV4, Elf4, Etv2, Fli1, ETS1, ELF3, ERG, EHF, ELF5
## EWS, ELF1, GABPA, SPDEF, Elk1, Hoxa9, ETS, Tbx5, Elk4, Hnf6b
## Nanog, Hoxd10, Hoxa10, Cux2, SpiB, Gfi1b, LRF, STAT4, PRDM1, GATA
## Negative: Tgif1, SCL, Tgif2, Hoxa11, TRPS1, Hoxd11, Sox6, RUNX2, Sox3, Olig2
## RUNX, RUNX1, THRb, Sox17, RARa, ERRg, TEAD, Pitx1, Tbr1, TEAD1
## Sox10, Foxo3, TEAD4, CDX4, TCF4, TEAD2, Six2, NPAS2, Sox9, TEAD3
## PC_ 4
## Positive: Nanog, Hoxa9, Tbx5, Hnf6b, Cux2, Gfi1b, Hoxd10, Hoxa10, Tbx21, STAT4
## LRF, Hoxd13, Ap4, Eomes, MYB, Stat3, PRDM15, PRDM1, Twist2, Tcf21
## BHLHA15, Rfx6, Cdx2, TCF4, Bcl6, Atoh1, Bapx1, ZBTB18, Npas4, HOXB13
## Negative: NPAS2, MNT, NPAS, Gata2, Gata6, GATA3, Gata4, Max, Gata1, BMAL1
## USF1, TRPS1, Tgif1, Hoxa11, Hoxd11, MITF, CLOCK, Fra1, NFAT, THRb
## HIC1, EBF1, MafF, GRHL2, Tcf7, ERRg, Usf2, ETV1, GABPA, GSC
## PC_ 5
## Positive: Hoxd11, Hoxd12, Hoxa11, ZNF7, GLIS3, NFAT, Tgif2, Fra2, ELF1, Elk4
## Tgif1, Hoxa13, MafB, Elk1, Hand2, ETS, Fli1, ETV4, NF1, ETV1
## Sox17, HOXB13, Unknown, Sox6, PR, Six2, CDX4, Fos, Mef2b, GABPA
## Negative: Hoxa9, Tbx5, CLOCK, Hnf6b, bHLHE40, Max, NPAS, BMAL1, Cux2, USF1
## FOXM1, BHLHA15, Hoxa10, MITF, Nanog, Gfi1b, Foxa2, Twist2, Foxa3, TCF4
## Atoh1, Tcf21, FOXA1, FoxD3, FOXP1, GATA3, Usf2, Olig2, FoxL2, Foxf1
VizDimLoadings(smartseq2, dims = 1:4, reduction = "pca")
grid.arrange( P1pca, ncol = 1,nrow = 1)
print(P2)
## png
## 2
set.seed(1)
smartseq2 <- FindNeighbors(smartseq2, dims = 1:10)
smartseq2 <- FindClusters(smartseq2, resolution = RES, random.seed= 1, algorithm=1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4640
## Number of edges: 143918
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8978
## Number of communities: 12
## Elapsed time: 0 seconds
CO<-brewer.pal(n = 12, name = "Paired")
COb<-brewer.pal(n = 8, name = "Dark2")
CO2b<-c("#E66100","#000000","#0affc2")
CO2<-c("#b2182b","#ef8a62","#67a9cf")
##EDIT Paired color
COe2<- c("#88CCEE", "#332288","#AA4499", "#44AA99", "#999933", "#882255", "#661100", "#6699CC","#888888","#000000","#77ddcc","#df65b0","#CC79A7","#332288", "#E66100", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", "#A6CEE3", "#FDBF6F", "#FF7F00","#CAB2D6","#F0E442","#B15928","#7570B3","#E7298A","#666666","#c241f8" ,"mediumaquamarine","#882E72",'#1965B0',"#bf812d")
A1<- c('#b2182b','#d6614d','#f4a582')
A2<- c('#8c510a','#bf812d','#F6C141')
A3<- c("#882E72", "#AE76A3","#D1BBD7")
A4<- c('#01665e','#90C987','#CAE0AB')
A5<- c ('#1965B0','#4393c3',"#92c5de")
CO_ALU2=c("#d7191c","#d8b365","#542788","#abd9e9", "#2c7bb6", "#4d4d4d","grey")
CO_ALU=c("#d7191c","#542788","#d8b365","#abd9e9", "#2c7bb6", "#4d4d4d","grey")
CO_ALU3=c("#d7191c","#d8b365","#542788","grey", "#91bfdb", "grey","grey")
COe<- c(A1,A2,A3,A4,A5)
smartseq2 <- RunUMAP(smartseq2, dims = 1:30, n.neighbors = 30)
smartseq2 <- RunTSNE(smartseq2, dims = 1:20, check_duplicates = FALSE)
##### These PLots are printed later in the Script. here are to check #####
#DimPlot(smartseq2, reduction = "umap", label = T,cols = COe2,pt.size=0.7)
#DimPlot(smartseq2, reduction = "tsne", label = T,cols = COe2,pt.size=0.7)
#DimPlot(smartseq2, reduction = "umap",group.by ='GROUP',cols = CO_ALU,pt.size=0.7)
#DimPlot(smartseq2, reduction = "tsne",group.by ='GROUP',cols = COe,pt.size=0.7)
#DimPlot(smartseq2, reduction = "tsne", label = T)
#DimPlot(smartseq2, reduction = "tsne",group.by ='seurat_clusters', split.by = 'GROUP',cols = COe2,pt.size=0.7)
#DimPlot(smartseq2, reduction = "tsne",group.by ='GROUP', split.by = 'seurat_clusters',cols = CO_ALU3,pt.size=0.7)
#DimPlot(smartseq2, reduction = "umap",group.by ='GROUP',cols = CO_ALU3,pt.size=0.7)
#DimPlot(smartseq2, reduction = "tsne",group.by ='GROUP',cols = CO_ALU3,pt.size=0.7)
{pdf(file=paste(PTHA1,"01Umap_tsne_",PROJECT,"CLU_PC10_Dim30",".pdf",sep=""), width=8, height=3)
P00a<- DimPlot(smartseq2, reduction = "umap", label = T,cols = COe2,pt.size=0.7)
P01a<- DimPlot(smartseq2, reduction = "umap", label = F,cols = COe2,pt.size=0.7)
P00at<- DimPlot(smartseq2, reduction = "tsne", label = T,cols = COe2,pt.size=0.7)
P01at<- DimPlot(smartseq2, reduction = "tsne", label = F,cols = COe2,pt.size=0.7)
P03<-DimPlot(smartseq2, reduction = "umap",group.by ='seurat_clusters', split.by = 'GROUP',cols = COe2,pt.size=0.7)
P03a<- DimPlot(smartseq2, reduction = "umap",group.by ='GROUP', split.by = 'seurat_clusters',cols = CO_ALU3,pt.size=0.7)
P03t<-DimPlot(smartseq2, reduction = "tsne",group.by ='seurat_clusters', split.by = 'GROUP',cols = COe2,pt.size=0.7)
P03at<- DimPlot(smartseq2, reduction = "tsne",group.by ='GROUP', split.by = 'seurat_clusters',cols = CO_ALU3,pt.size=0.7)
arrange1 <- ggarrange(P00a,P01a, ncol = 2,nrow =1,common.legend = T, align = c("hv"),legend="right")
arrange1t <- ggarrange(P00at,P01at, ncol = 2,nrow =1,common.legend = T, align = c("hv"),legend="right")
arrange1c <- ggarrange(P03, ncol = 1,nrow =1,common.legend = T, align = c("hv"),legend="right")
print(arrange1t)
print(arrange1)
}
dev.off()
## png
## 2
{pdf(file=paste(PTHA1,"01Umap_tsne_",PROJECT,"CLU_PC10_Dim30_by_STAGE",".pdf",sep=""), width=12, height=3)
arrange1bt <- ggarrange(P03t, ncol = 1,nrow =1,common.legend = T, align = c("hv"),legend="right")
arrange1ca <- ggarrange(P03a, ncol = 1,nrow =1,common.legend = T, align = c("hv"),legend="right")
arrange1cat <- ggarrange(P03at, ncol = 1,nrow =1,common.legend = T, align = c("hv"),legend="right")
print(arrange1bt)
print(arrange1c)
print(arrange1cat)
}
dev.off()
## png
## 2
print(arrange1t)
print(arrange1)
print(arrange1bt)
print(arrange1c)
print(arrange1cat)
## png
## 2
print(arrange3)
print(arrange3t)
print(arrange2)
grid.arrange( P20,P201, ncol = 1,nrow = 2)
grid.arrange( P05ah_bino1, ncol = 1,nrow = 1)
grid.arrange( P05ah_bino, ncol = 1,nrow = 1)
##Save Seurat object - Important for further modification of the plots. You do not need to start from the start of the script.
DATA_SC<- data.frame(smartseq2@assays[["RNA"]]@counts@i)
DATA_SC<- data.frame(smartseq2@assays[["RNA"]]@counts@i)
DATA_SC<- GetAssay(smartseq2,assay = "RNA")
DATA_SC1<- data.frame(DATA_SC@scale.data)
DATA_SC2<- data.frame(DATA_SC@scale.data)
#dim(DATA_SC1)
META_SEU<- data.frame(smartseq2@meta.data)
colnames(DATA_SC1)<- META_SEU$PositionID
colnames(META_SEU)[4]<- "SYMBOL_TF"
GENES001<- c("Twist2","Smad4", "Nanog","Smad2","Fos","Sox9","Sox10","JunB")
GENES002 <- c("Pitx1","Isl1", "Mef2b","FOXA1","TEAD3","GATA3", "Gata6", "Mef2b")
GENES003 <- c("Eomes","Oct4","Foxh1","CUX1","Bapx1", "Elk4","Foxo1","Fli1")
GENES004<- c("Rbp7","Rgcc", "Fabp4","Egfl7","Flt1", "Cd36","Nrp1","Tpm1", "Rbp1")
LE=10
SI=10
YL3="Norm binding of TFs"
XL3="clusters"
P1 <- VlnPlot(smartseq2, GENES001, stack = TRUE, sort = F, flip = T,pt.size=0.02)+geom_point(size = .4,colour = "darkgrey") +scale_fill_manual(values = COe2)+
theme(legend.position = "none",plot.title= element_text(color=BASIC_COL, size=LE, face="bold"), axis.text.y= element_text(size=LE,face="bold"),axis.text.x = element_text(size=SI, face="bold"), axis.title.x =element_text(color=BASIC_COL, size=LE, face="bold"), axis.title.y = element_text(color=BASIC_COL, size=LE, face="bold")) +labs(title=paste(PROJECT, "SEVERAL_TFs",sep="" ) , x=XL3, y = YL3)
P1CM <- VlnPlot(smartseq2, GENES002, stack = TRUE, sort = F, flip = T,pt.size=0.02)+geom_point(size = .4,colour = "darkgrey") +scale_fill_manual(values = COe2)+
theme(legend.position = "none",plot.title= element_text(color=BASIC_COL, size=LE, face="bold"), axis.text.y= element_text(size=LE,face="bold"),axis.text.x = element_text(size=SI, face="bold"), axis.title.x =element_text(color=BASIC_COL, size=LE, face="bold"), axis.title.y = element_text(color=BASIC_COL, size=LE, face="bold")) +labs(title=paste(PROJECT, "SEVERAL_TFs",sep="" ) , x=XL3, y = YL3)
P1CM3 <- VlnPlot(smartseq2, GENES003, stack = TRUE, sort = F, flip = T,pt.size=0.02)+geom_point(size = .4,colour = "darkgrey") +scale_fill_manual(values = COe2)+
theme(legend.position = "none",plot.title= element_text(color=BASIC_COL, size=LE, face="bold"), axis.text.y= element_text(size=LE,face="bold"),axis.text.x = element_text(size=SI, face="bold"), axis.title.x =element_text(color=BASIC_COL, size=LE, face="bold"), axis.title.y = element_text(color=BASIC_COL, size=LE, face="bold")) +labs(title=paste(PROJECT, "SEVERAL_TFs",sep="" ) , x=XL3, y = YL3)
#P1CM3
#print(P1CM)
pdf(file=paste(PTHA1,"09_", PROJECT,"_CLU_vs_Ex_VIol_groups_STACKED",".pdf",sep=""), width=10, height=15)
print(P1)
arrange1CM <- ggarrange(P1CM, ncol = 1,nrow =1,common.legend = T, align = c("hv"),legend="top")
arrange1CM3 <- ggarrange(P1CM3, ncol = 1,nrow =1,common.legend = T, align = c("hv"),legend="top")
dev.off()
## png
## 2
print(arrange1CM)
print(arrange1CM3)
## png
## 2
print(arrange1CM41)
print(arrange1CM42)
print(arrange1CM43)
## png
## 2
print(arrange12)
print(P04b12)
print(P04c12)
#FeaturePlot(smartseq2, features =GENES001,min.cutoff=0,max.cutoff=1,cols = CO3)
P05a<-FeaturePlot(smartseq2, features =GENES001,min.cutoff=0,max.cutoff=1,cols = CO3)
P05b<-FeaturePlot(smartseq2, features =GENES002,min.cutoff=0,max.cutoff=1,cols = CO3)
P05c<-FeaturePlot(smartseq2, features =GENES003,min.cutoff=0,max.cutoff=1,cols = CO3)
{pdf(file=paste(PTHA1,"09_",PROJECT,"_CLU_vs_Exp_UMMAP_groups",".pdf",sep=""),width=8, height=6)
print(P05a)
print(P05b)
print(P05c)
}
dev.off()
## png
## 2
P05a1<-FeaturePlot(smartseq2, features =GENES001,min.cutoff=0,max.cutoff=1,cols = CO3,reduction="tsne")
P05b1<-FeaturePlot(smartseq2, features =GENES002,min.cutoff=0,max.cutoff=1,cols = CO3,reduction="tsne")
P05c1<-FeaturePlot(smartseq2, features =GENES003,min.cutoff=0,max.cutoff=1,cols = CO3,reduction="tsne")
{pdf(file=paste(PTHA1,"09_",PROJECT,"_CLU_vs_Ex_TSNE_groups",".pdf",sep=""),width=8, height=6)
print(P05a1)
print(P05b1)
print(P05c1)
}
dev.off()
## png
## 2
print(P05a)
print(P05b)
print(P05c)
print(P05a1)
print(P05b1)
print(P05c1)